
# -----------------------------------------------------------
rm(list=ls())
library(plm)
library(gmm)

# -----------------------------------------------------------
# Define a function to calculate the spillover productivity
swfn <- function(w) {
  sw <- c()
  for(j in 1:N) {
    sw <- c(sw, sum(w[-j])/(N-1))
  }
  return(sw)
}
# -----------------------------------------------------------
# The following function re-arrange a matrix into a vector
long <- function(X) {
  X2 <- c()
  for(i in 1:nrow(X)) {
    X2 <- c(X2, X[i,])
  }
  return(X2)
}
# -----------------------------------------------------------

# ========================================================
# DGP

set.seed(123)

# Parameters
N <- 100; T <- 10; R <- 1000
d0 <- 0.2; d1 <- 0.55; d2 <- 0.4; d3 <- 0.5    # productivity parameters
ak <- 0.25; am <- 0.65    # production function parameters
b1 <- 0.8; b2 <- 0.1    # I policy function parameters
r0 <- 0.01; r1 <- 0.6; r2 <- 0.3     # G parameters
de <- c(0.05, 0.075, 0.1, 0.125, 0.15)   # depreciation rate

INITIAL <- DFDK <- DFDM <- RHO1 <- RHO2 <- RHO3 <- c()   # matrixes for the estimated elasticities

for(ii in 1:R) {
  
  # initial values
  w0 <- runif(N, 1, 3)
  K0 <- runif(N, 10, 200)
  G0 <- runif(N, 0, 1); 
  I0 <- K0^b1*exp(b2*w0)
  M0 <- (am*K0^ak*exp(w0))^(1/(1-am))

  # errors
  eta <- matrix(rnorm(N*T, 0, sqrt(0.07)), N, T)
  xi <- matrix(rnorm(N*T, 0, sqrt(0.04)), N, T)   
  eps <- matrix(rnorm(N*T, 0, sqrt(0.01)), N, T)
  
  # Evolution processes
  # We will first generate state variables, and then control variables whose values
  # depend on state variables.
  Y <- K <- M <- I <- W <- G <- matrix(NA, N, T)
  
  for(i in 1:T) {
    if(i==1) {
      W[,i] <- d0 + d1*w0 + d2*swfn(w0) +d3*G0 + xi[,i]
      K[,i] <- (1-de)*K0 + I0
      G[,i] <- r0 + r1*G0 + r2*W[,i] + eps[,i]; 
      I[,i] <- K[,i]^b1*exp(b2*W[,i])
      M[,i] <- (am*K[,i]^ak*exp(W[,i]))^(1/(1-am))
      Y[,i] <- K[,i]^ak*M[,i]^am*exp(W[,i]+eta[,i])
    }
    else {
      W[,i] <- d0 + d1*W[,(i-1)] + d2*swfn(W[,(i-1)]) +d3*G[,(i-1)] + xi[,i]
      K[,i] <- (1-de)*K[,(i-1)] + I[,(i-1)]
      G[,i] <- r0 + r1*G[,(i-1)] + r2*W[,i] + eps[,i]; 
      I[,i] <- K[,i]^b1*exp(b2*W[,i])
      M[,i] <- (am*K[,i]^ak*exp(W[,i]))^(1/(1-am))
      Y[,i] <- K[,i]^ak*M[,i]^am*exp(W[,i]+eta[,i])
    }
  }
  
  pm <- mean(exp(eta))
  data <- data.frame(id=rep(1:N, each = T), year=rep(1:T, N), Y=long(Y), K=long(K), M=long(M), G=long(G), sm=pm*long(M)/long(Y) )
  
  # ----------------------------------------------
  # Start the estimation
  # Step 1
  h.lnbe <- mean(log(data$sm)); h.eta <- - log(data$sm) + h.lnbe; h.e <- mean(exp(h.eta))
  h.betam <- exp(h.lnbe)/h.e
  
  # ----------------------------------------------
  data <- pdata.frame(data)
  data$K1 <- lag(data$K); data$M1 <- lag(data$M); data$G1 <- lag(data$G)
  data$y <- log(data$Y); data$k <- log(data$K); data$m <- log(data$M)
  data$k1 <- log(data$K1); data$m1 <- log(data$M1);
  data <- data[complete.cases(data),]
  data <- as.data.frame(data)
  print(ii)
  
  # ----------------------------------------------
  # Step 2
  # Generate weights & variables related to weights
  for(i in 1:nrow(data)) {
    s1.t <- (as.numeric(data$year)==as.numeric(data$year[i])); s1.t[i] <- 0
    s1.b <- sum(s1.t)
    s1 <- if(s1.b>0) s1.t/s1.b else rep(0, length(s1.t));
    data$sk1[i] <- sum(s1*data$k1); data$sm1[i] <- sum(s1*data$m1); 
  }
  
  # Define functions: "sphi", "phi"
  sphi <- function(beta) {
    (1-h.betam)*data$sm1 - h.lnbe - log(1/pm) - beta[1]*data$sk1 
  }
  
  phi <- function(beta) {
    (1-h.betam)*data$m1 - h.lnbe - log(1/pm) - beta[1]*data$k1
  }
  
  # Optimization
  RHO.temp <- OBJ <- BK <- c()
  for(ik in seq(0.01, 1, by=0.01)) {
    y.star <- data$y - h.betam*data$m - ik*data$k 
    p1 <- as.vector(phi(ik)); p2 <- as.vector(sphi(ik)); 
    var.poly <- poly(cbind(p1, data$G1, p2), degree = 2, raw = TRUE)
    mod1 <- lm(y.star~var.poly)
    RHO.temp <- cbind(RHO.temp, mod1$coeff)
    OBJ <- cbind(OBJ, sum(mod1$residuals^2))
    BK <- cbind(BK, ik)
  }
  target <- which.min(OBJ)
  
  # results - elas
  h.betak <- BK[target]
  DFDK <- cbind(DFDK, h.betak); DFDM <- cbind(DFDM, h.betam)
  # results - rho1 and rho2
  coef <- RHO.temp[2:10,target]
  w1 <- as.vector(phi(c(h.betak))); sw1 <- as.vector(sphi(c(h.betak)))
  
  RHO1  <- cbind(RHO1, coef[1] + 2*coef[2]*w1 + coef[4]*data$G1 + coef[7]*sw1)
  RHO2  <- cbind(RHO2, coef[6] + coef[7]*w1 + coef[8]*data$G1 + 2*coef[9]*sw1)  
  RHO3  <- cbind(RHO3, coef[3] + coef[4]*w1 + 2*coef[5]*data$G1 + coef[8]*sw1)
  
}
RHO4 <- RHO2*RHO3

rmse_k <- function(x, truevalue=0.25) sqrt(mean((x-truevalue)^2))
rmse_m <- function(x, truevalue=0.65) sqrt(mean((x-truevalue)^2))
rmse_r <- function(x) sqrt(mean(x^2))
# define a function to calculate the mean absolute deviation
mad_k <- function(x, truevalue=0.25) mean(abs(x-truevalue))
mad_m <- function(x, truevalue=0.65) mean(abs(x-truevalue))
mad_r <- function(x) mean(abs(x))

n11 <- mean(DFDK); n12 <- median(DFDK); n13 <- sd(DFDK); n14 <- rmse_k(DFDK); n15 <- mad_k(DFDK)
n21 <- mean(DFDM); n22 <- median(DFDM); n23 <- sd(DFDM); n24 <- rmse_m(DFDM); n25 <- mad_m(DFDM)
medRHO1 <- apply(RHO1, MARGIN = 2, median)
n31 <- mean(medRHO1); n32 <- median(medRHO1); n33 <- sd(medRHO1); n34 <- median(apply(RHO1-d1, MARGIN = 2, rmse_r)); n35 <- median(apply(RHO1-d1, MARGIN = 2, mad_r))
medRHO2 <- apply(RHO2, MARGIN = 2, median)
n41 <- mean(medRHO2); n42 <- median(medRHO2); n43 <- sd(medRHO2); n44 <- median(apply(RHO2-d2, MARGIN = 2, rmse_r)); n45 <- median(apply(RHO2-d2, MARGIN = 2, mad_r))
medRHO3 <- apply(RHO3, MARGIN = 2, median)
n51 <- mean(medRHO3); n52 <- median(medRHO3); n53 <- sd(medRHO3); n54 <- median(apply(RHO3-d3, MARGIN = 2, rmse_r)); n55 <- median(apply(RHO3-d3, MARGIN = 2, mad_r))
medRHO4 <- apply(RHO4, MARGIN = 2, median); 
n61 <- mean(medRHO4); n62 <- median(medRHO4); n63 <- sd(medRHO4); n64 <- median(apply(RHO4-d2*d3, MARGIN = 2, rmse_r)); n65 <- median(apply(RHO4-d2*d3, MARGIN = 2, mad_r))


Table <- matrix(c(n11, n12, n13, n14, n15, 
                  n21, n22, n23, n24, n25,
                  n31, n32, n33, n34, n35,
                  n41, n42, n43, n44, n45,
                  n51, n52, n53, n54, n55,
                  n61, n62, n63, n64, n65), 6, 5, byrow = TRUE)
Table



